1 Importations et transformation des données

1.1 Importations

# Packages
library(dplyr)              # tidyverse
library(foreign)            # read.dbf
library(lubridate)      # dates
library(data.table)

# Graphes packages
library(ggplot2) ; ggplot2::theme_set(theme_light())
library(ggmap)
library(viridis)
library(ggpubr)
library(plotly)

# Packages calcul
library(Distance)
library(dsm)

# Packages raster/carto
library(sp)
library(rgdal)
library(raster)
load("../data/effort_output.RData")
load("../data/list_prepare_obs_by_sp.RData")
load("../data/predata_output.RData")
gridata <- read.dbf("../data/SPEE_CAPECET_Grid2km_modified.dbf")
# On importe les fonctions pour sélectionner les covariables (fonction de détection + dsm)
source("./fonctions/selec_detfc_aic.R")
source("./fonctions/selec_dsm_aic_fwd.R")

# On importe la fonction pred_splines
source("./fonctions/pred_splines.R")

# On importe les fonctions get_map_abundance
source("./fonctions/get_map_abundance.R")
source("./fonctions/get_map_abundance_extr.R")

1.2 Sélection des données

# Jointure
distdata <- dplyr::left_join(list_prepare_obs_by_sp$PRIGLA_obs_output$obsdata,
                                                                     cov_segment <- predata_output$segdata,
                                                                     by = "Seg")

# Réarrangement des colonnes
distdata <- distdata[, c(3, 5:11, 1:2, 14:31)]
colnames(distdata)[1] = "Transect.Label"
colnames(distdata)[2] = "Seg"
colnames(distdata)[3] = "Sample.Label"
colnames(distdata)[5] = "session"


distdata$seaState = as.integer(distdata$seaState)
distdata$observerId <- as.integer(distdata$observerId)

# Changement de l'ordre des colonnes pour garder le même ordre pour les covariables que segdata
distdata <- distdata[, c(1:19, 24:28, 20:23)]
obsdata <- list_prepare_obs_by_sp$PRIGLA_obs_output$obsdata
segdata <- effort_output$segdata
predata <- predata_output$predata
predata$session <- factor(predata$session)
unique(predata$session)
## [1] <NA> 1    2    3    4   
## Levels: 1 2 3 4
# Changement de l'ordre des colonnes pour garder le même ordre pour les covariables que segdata
predata <- predata[, c(1:6, 11:15, 7:10)]

On récupère les données suivantes :

  • obsdata, segdateet predata
  • distdata, une jointure entre predata et obsdata sur le segment

1.3 Centrage-réduction

On centre et réduit les covariables (présentes dans distdata, segdata et predata).

# On récupère mean et sd de segdata pour les colonnes 15 à 23
cov_names_mean_sd <- matrix(rep(NA, 9*2), ncol = 9)
colnames(cov_names_mean_sd) <- names(segdata[, 15:23])
rownames(cov_names_mean_sd) <- c("mean", "sd")

# moyenne
for (i in 1:9){
    cov_names_mean_sd[1,i] = mean(segdata[,i+14], na.rm = TRUE)
}
# sd
for (i in 1:9){
    cov_names_mean_sd[2,i] = sd(segdata[,i+14], na.rm = TRUE)
}
cov_names_mean_sd
##      CHL_4w_mea CHL_4w_sd SST_4w_mea SST_4w_sd POC_4w_mea    depth distCoast
## mean  1.3016453 0.4070661  15.865554 0.8412628  211.17688 51.14547  38510.47
## sd    0.8536337 0.3629038   3.251786 0.4386712   94.86176 36.32341  30059.10
##       dist200   slopeP
## mean 88112.13 28913.18
## sd   32477.64 27947.08
# On centre-réduit les données de segdata, distdata et predata avec la moyenne et l'écart type de chaque covariable dans segdata

# segdata
for (i in 1:9){
    # On récupère la colonne du jeu de données non centré-réduit
    column <- as.data.frame(segdata[, grep(colnames(cov_names_mean_sd)[i], colnames(segdata))])
    
    # On récupère la moyenne et l'écart type pour cette covariable
    mean_cov <- cov_names_mean_sd[1, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    sd_cov <- cov_names_mean_sd[2, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    
    # On applique le centrage-réduction
    column <- apply(X = column, 
                                    MARGIN = 1, 
                                    FUN =  function(valeur){
                                        return((valeur - mean_cov)/sd_cov)
                                    }
    )
    segdata[i+14] = column
}

# distdata
for (i in 1:9){
    # On récupère la colonne du jeu de données non centré-réduit
    column <- as.data.frame(distdata[, grep(colnames(cov_names_mean_sd)[i], colnames(distdata))])
    
    # On récupère la moyenne et l'écart type pour cette covariable
    mean_cov <- cov_names_mean_sd[1, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    sd_cov <- cov_names_mean_sd[2, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    
    # On applique le centrage-réduction
    column <- apply(X = column, 
                                    MARGIN = 1, 
                                    FUN =  function(valeur){
                                        return((valeur - mean_cov)/sd_cov)
                                    }
    )
    distdata[i+19] = column
}

# predata
for (i in 1:9){
    # On récupère la colonne du jeu de données non centré-réduit
    column <- as.data.frame(predata[, grep(colnames(cov_names_mean_sd)[i], colnames(predata))])
    
    # On récupère la moyenne et l'écart type pour cette covariable
    mean_cov <- cov_names_mean_sd[1, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    sd_cov <- cov_names_mean_sd[2, grep(colnames(cov_names_mean_sd)[i], colnames(cov_names_mean_sd))]
    
    # On applique le centrage-réduction
    column <- apply(X = column, 
                                    MARGIN = 1, 
                                    FUN =  function(valeur){
                                        return((valeur - mean_cov)/sd_cov)
                                    }
    )
    predata[i+6] = column
}

1.4 Longitude, latitude en X et Y en lambert93

predata

predata_save <- predata

## On récupère les coordonnées et on les transforme en Lambert 93
coordinates(predata) <- c("longitude", "latitude")
proj4string(predata) <- CRS("+init=epsg:4326") # Actuellement, epsg = 4326 : WGS84

## On créé un predata temporaire avec toutes les informations nécessaires
predata_l93 <- spTransform(predata, CRS("+init=epsg:2154")) # on veut, epsg = 2154 : Lambert93

## On récupère les coordonnées en X et Y
coord_l93 <- as.data.frame(coordinates(predata_l93))

## On remplace dans predata les X, Y faux par les nouvelles coordonnées X, Y en L93
predata <- predata_save
predata$X <- coord_l93$longitude
predata$Y <- coord_l93$latitude

distdata

distdata_save <- distdata

## On récupère les coordonnées et on les transforme en Lambert 93
coordinates(distdata) <- c("longitude", "latitude")
proj4string(distdata) <- CRS("+init=epsg:4326") # Actuellement, epsg = 4326 : WGS84

## On créé un predata temporaire avec toutes les informations nécessaires
distdata_l93 <- spTransform(distdata, CRS("+init=epsg:2154")) # on veut, epsg = 2154 : Lambert93

## On récupère les coordonnées en X et Y
coord_l93 <- as.data.frame(coordinates(distdata_l93))

## On remplace dans predata les X, Y faux par les nouvelles coordonnées X, Y en L93
distdata <- distdata_save
distdata$X <- coord_l93$longitude
distdata$Y <- coord_l93$latitude

gridata

gridata_save <- gridata
## On récupère les coordonnées et on les transforme en Lambert 93
coordinates(gridata) <- c("lon", "lat")
proj4string(gridata) <- CRS("+init=epsg:4326") # Actuellement, epsg = 4326 : WGS84

## On créé un predata temporaire avec toutes les informations nécessaires
gridata_l93 <- spTransform(gridata, CRS("+init=epsg:2154")) # on veut, epsg = 2154 : Lambert93

## On récupère les coordonnées en X et Y
coord_l93 <- as.data.frame(coordinates(gridata_l93))

## On remplace dans predata les X, Y faux par les nouvelles coordonnées X, Y en L93
gridata <- gridata_save
gridata$X <- coord_l93$lon
gridata$Y <- coord_l93$lat

2 Fonction de détection

detfc.sea.hr <- Distance::ds(
                        distdata,
                        max(distdata$distance),
                        formula = ~seaState,
                        key = "hr")

3 Fonction de densité (covariables communes par session)

3.1 Ajustement de la fonction de densité

dsm pour \(availability\) dépendante de on-shelf et off-shelf : On note “on-shelf” quand la profondeur est inférieure à 150m, et “off-shelf” si la profondeur est supérieure à 150m.

\[availability_{off-shelf}=0,1357617\] \[availability_{on-shelf}=0,6332016\]

segdata_tmp <- segdata %>% filter(month(date) == 5 | month(date) == 6)
obsdata_tmp <- obsdata %>% filter(session == 2)

# On choisit s(X, Y)
dsm_s2_av1 <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = 1)


segdata_tmp <- segdata %>% filter(month(date) == 7 | month(date) == 8)
obsdata_tmp <- obsdata %>% filter(session == 3)

dsm_s3_av1 <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = 1)


segdata_tmp <- segdata %>% filter(month(date) == 5 | month(date) == 6)
obsdata_tmp <- obsdata %>% filter(session == 2)

dsm_s2_av041 <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = 0.41)


segdata_tmp <- segdata %>% filter(month(date) == 7 | month(date) == 8)
obsdata_tmp <- obsdata %>% filter(session == 3)

dsm_s3_av041 <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = 0.41)

# On-shelf/off-shelf

distdata$availability = NA
for (i in 1:nrow(distdata)) {
    if (distdata$depth[i] <= 150) {
        distdata$availability[i] = 0.6332016
    } else{
        distdata$availability[i] = 0.1357617
    }
}

segdata_tmp <- segdata %>% filter(month(date) == 5 | month(date) == 6)
obsdata_tmp <- obsdata %>% filter(session == 2)
availability <- (distdata %>% filter(session == 2))$availability

dsm_s2_avshelf <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = availability)

segdata_tmp <- segdata %>% filter(month(date) == 7 | month(date) == 8)
obsdata_tmp <- obsdata %>% filter(session == 3)
availability <- (distdata %>% filter(session == 3))$availability

dsm_s3_avshelf <- dsm(
                formula = count ~ s(SST_4w_mea) + s(X, Y) + s(CHL_4w_mea),
                ddf.obj = detfc.sea.hr,
                segment.data = segdata_tmp,
                observation.data = obsdata_tmp,
                method = 'REML',
                family = nb(),
                engine = 'gam',
                gamma = 1.4,
                availability = availability)

4 Prédiction de l’abondance

predata_tmp2 <- predata %>% filter(session == 2)
predata_tmp3 <- predata %>% filter(session == 3)

dsm_s2_av1.pred <- predict(dsm_s2_av1, predata_tmp2, predata_tmp2$Area, se.fit = TRUE)
dsm_s3_av1.pred <- predict(dsm_s3_av1, predata_tmp3, predata_tmp3$Area, se.fit = TRUE)

dsm_s2_av041.pred <- predict(dsm_s2_av041, predata_tmp2, predata_tmp2$Area, se.fit = TRUE)
dsm_s3_av041.pred <- predict(dsm_s3_av041, predata_tmp3, predata_tmp3$Area, se.fit = TRUE)

dsm_s2_avshelf.pred <- predict(dsm_s2_avshelf, predata_tmp2, predata_tmp2$Area, se.fit = TRUE)
dsm_s3_avshelf.pred <- predict(dsm_s3_avshelf, predata_tmp3, predata_tmp3$Area, se.fit = TRUE)

4.1 Comparaison des résultats selon la valeur de disponibilité

res_abondance <- data.frame(
    "Session" = c(2, 3, 2, 3, 2, 3),
    "Disponibilité" = c("1", "1", "0.41", "0.41", "on-shelf/off-shelf", "on-shelf/off-shelf"),
    "Min" = c(
        sum(dsm_s2_av1.pred$fit - (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_av1.pred$fit - (dsm_s3_av1.pred$se.fit)),
        sum(dsm_s2_av041.pred$fit - (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_av041.pred$fit - (dsm_s3_av1.pred$se.fit)),
        sum(dsm_s2_avshelf.pred$fit - (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_avshelf.pred$fit - (dsm_s3_av1.pred$se.fit))
    ),
    "Estimation" = c(
        sum(dsm_s2_av1.pred$fit),
        sum(dsm_s3_av1.pred$fit),
        sum(dsm_s2_av041.pred$fit),
        sum(dsm_s3_av041.pred$fit),
        sum(dsm_s2_avshelf.pred$fit),
        sum(dsm_s3_avshelf.pred$fit)
    ),
    "Max" = c(
        sum(dsm_s2_av1.pred$fit + (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_av1.pred$fit + (dsm_s3_av1.pred$se.fit)),
        sum(dsm_s2_av041.pred$fit + (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_av041.pred$fit + (dsm_s3_av1.pred$se.fit)),
        sum(dsm_s2_avshelf.pred$fit + (dsm_s2_av1.pred$se.fit)),
        sum(dsm_s3_avshelf.pred$fit + (dsm_s3_av1.pred$se.fit))
    )
)

data.table::data.table(res_abondance)
##    Session      Disponibilité       Min Estimation       Max
## 1:       2                  1 2923.0908   4385.810  5848.529
## 2:       3                  1  330.3391   1455.760  2581.181
## 3:       2               0.41 9044.6664  10507.386 11970.105
## 4:       3               0.41 2382.8334   3508.254  4633.675
## 5:       2 on-shelf/off-shelf 5385.5652   6848.284  8311.004
## 6:       3 on-shelf/off-shelf 1131.1234   2256.544  3381.965
res_abondance <- data.frame(
    "Session" = c(2, 3, 2, 3, 2, 3),
    "Disponibilité" = c("1", "1", "0.41", "0.41", "on-shelf/off-shelf", "on-shelf/off-shelf"),
    "Estimation" = c(
        sum(dsm_s2_av1.pred$fit),
        sum(dsm_s3_av1.pred$fit),
        sum(dsm_s2_av041.pred$fit),
        sum(dsm_s3_av041.pred$fit),
        sum(dsm_s2_avshelf.pred$fit),
        sum(dsm_s3_avshelf.pred$fit)
    ),
    "Plus ou moins" = c(
        sum(dsm_s2_av1.pred$se.fit),
        sum(dsm_s3_av1.pred$se.fit),
        sum(dsm_s2_av041.pred$se.fit),
        sum(dsm_s3_av041.pred$se.fit),
        sum(dsm_s2_avshelf.pred$se.fit),
        sum(dsm_s3_avshelf.pred$se.fit)
    )
)

data.table::data.table(res_abondance)
##    Session      Disponibilité Estimation Plus.ou.moins
## 1:       2                  1   4385.810      1462.719
## 2:       3                  1   1455.760      1125.421
## 3:       2               0.41  10507.386      4181.951
## 4:       3               0.41   3508.254      3378.258
## 5:       2 on-shelf/off-shelf   6848.284      2480.499
## 6:       3 on-shelf/off-shelf   2256.544      1965.532

4.2 Cartes

# Création de la carte vide
empty.map <- ggmap(get_stamenmap(
    bbox = make_bbox(
        lon = c(min(distdata$longitude), max(distdata$longitude)+0.8),
        lat = c(min(distdata$latitude), max(distdata$latitude)),
        f = 0.4
    ),
    zoom = 8,
    maptype = "toner-lite"
))

4.2.1 Session 2 et availability = 1

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s2_av1.pred$fit,
    predata_tmp = predata_tmp2,
    session_selec = 2,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s2_av1.pred.png",
        width = 1800,
        height = 1200)
map
dev.off()
## png 
##   2

4.2.2 Session 3 et availability = 1

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s3_av1.pred$fit,
    predata_tmp = predata_tmp3,
    session_selec = 3,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s3_av1.pred.png", width = 1800, height = 1200)
map
dev.off()
## png 
##   2

4.2.3 Session 2 et availability = 0.41

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s2_av041.pred$fit,
    predata_tmp = predata_tmp2,
    session_selec = 2,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s2_av041.pred.png", width = 1800, height = 1200)
map
dev.off()
## png 
##   2

4.2.4 Session 3 et availability = 0.41

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s3_av041.pred$fit,
    predata_tmp = predata_tmp3,
    session_selec = 3,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s3_av041.pred.png",
        width = 1800,
        height = 1200)
map
dev.off()
## png 
##   2

4.2.5 Session 2 et availability = off/on shelf

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s2_avshelf.pred$fit,
    predata_tmp = predata_tmp2,
    session_selec = 2,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s2_avshelf.pred$fit.png",
        width = 1800,
        height = 1200)
map
dev.off()
## png 
##   2

4.2.6 Session 3 et availability = off/on shelf

map <- get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s3_avshelf.pred$fit,
    predata_tmp = predata_tmp3,
    session_selec = 3,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = FALSE,
    observations = FALSE,
    poster = TRUE
)

png("./img/dsm_s3_avshelf.pred.png",
        width = 1800,
        height = 1200)
map
dev.off()
## png 
##   2

4.3 Carte pour le poster

#dsm_s3_avshelf.pred.2 <- (dsm_s3_avshelf.pred/sum(dsm_s3_avshelf.pred))*100

get_map_abundance(
    empty.map = empty.map,
    dsm.pred = dsm_s3_avshelf.pred$fit,
    predata_tmp = predata_tmp3,
    session_selec = 3,
    segdata = segdata,
    distdata = distdata,
    abondance = TRUE,
    transects = TRUE,
    observations = TRUE,
    poster = FALSE
)

5 Exportation des résultats en Rdata

# Fonction de détection
save(detfc.sea.hr,
         file = "resultats/detfc.Rdata")

# Modèles : dsm
save(dsm_s2_av1, dsm_s2_av041, dsm_s2_avshelf,
         dsm_s3_av1, dsm_s3_av041, dsm_s3_avshelf,
         file = "resultats/modeles_dsm.RData")

# Résultats de la prédiction : dsm.pred
save(dsm_s2_av1.pred, dsm_s2_av041.pred, dsm_s2_avshelf.pred,
         dsm_s3_av1.pred, dsm_s3_av041.pred, dsm_s3_avshelf.pred,
         file = "resultats/modeles_dsm.pred.RData")

# Données nettoyées
save(obsdata, predata, predata_tmp2, predata_tmp3, segdata, distdata,
         file = "../data/donnees_nettoyees.RData")